# -*- coding:utf-8 -*-
# @Date      :2021/5/5
# @Author    :Maoxian
# 使用线程池实现上一次的爬虫

import os
import time
from multiprocessing.dummy import Pool

import requests
from lxml import etree

base_url = "https://sc.chinaz.com/jianli/free.html"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'
}

response = requests.get(base_url, headers=headers)
html = etree.HTML(response.text)
page_links = ['https:' + i for i in html.xpath('//*[@id="container"]/div/a/@href')]  # 获取所有简历模板的页面链接


def download(link):
    res = requests.get(link, headers=headers)
    res.encoding = 'utf8'  # 设置网页编码
    page_html = etree.HTML(res.text)
    down_title = page_html.xpath('//div[@class="ppt_tit clearfix"]/h1/text()')[0]  # 获取简历模板名称
    down_title = down_title.strip().replace('下载', '')
    down_link = page_html.xpath('//*[@id="down"]/div[2]/ul/li[1]/a/@href')[0]  # 获取下载链接
    down_suffix = down_link.split('.')[-1]  # 获取文件后缀

    down_content = requests.get(down_link, headers=headers).content  # 获取文件内容
    if not os.path.exists('files'):  # 创建文件夹
        os.mkdir('files')

    print(f'《{down_title}》开始下载')
    with open(f'files/{down_title}.{down_suffix}', 'wb') as f:  # 保存文件内容
        f.write(down_content)
    print(f'《{down_title}》下载完成')


start = time.time()

with Pool(5) as pool:
    pool.map(download, page_links)

end = time.time()
print(f"程序共耗时：{end - start}")
